Aufgabe: Vorhersage der Umsätze vom 9.6.2019 bis 30.07.2019
Warengruppen: * 1 = Brot * 2 = Brötchen * 3 = Croissant * 4 = Konditorei * 5 = Kuchen * 6 = Saisonbrot
Wetterdaten: * Mittlerer Bewölkungsgrad am Tag (0 = min, 8 = max) * MIttlere Temperatur in C * Mittlere Windgeschwindigkeit in m/s * Wettercode (http://www.seewetter-kiel.de/seewetter/daten_symbole.htm) * und in der Datei wettercodes.Rda
remove(list = ls())
# Create list with needed libraries
# Quellen:
# 1. synthpop: https://cran.r-project.org/web/packages/synthpop/vignettes/synthpop.pdf
# 2.
pkgs <- c("lubridate", "stringr","tidyverse", "readr",
"fastDummies", "reticulate", "ggplot2", "Metrics", "VIM", "synthpop", "httr")
# Load each listed library and check if it is installed and install if necessary
for (pkg in pkgs) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg)
library(pkg, character.only = TRUE)
}
}
Skripte, in denen die Daten aufbereitet wurden: - Wetterdaten –> “Datenaufbereitung_Wetter.Rmd” - Feiertagedaten –> “Datenaufbereitung_Feiertage.R” - Schulferien –> “Datenaufbereitung_Schulferien.R” - Umsatzdaten –> “Datenaufbereitung_Umsatz.R” - Umsatzdaten aus dem Facheinzelhandel –> “??”
# Lade Daten
load("pj_wetter_dummy.Rda")
pj_wetter <- pj_wetter_dummy
load("kiwoDT.Rda")
pj_kiwo <- kiwoDT
load("pj_umsatz.Rda")
load("schulferien.Rda")
pj_schulferien <- schulferien
load("umsatzFachEinzelHandelSH.Rda")
# Erste Betrachtung der Daten
#summary(pj_wetter)
#summary(pj_kiwo)
#summary(pj_umsatz)
# Merge erstellt automatisch die Schnittmenge
# Der Zusatz all.x = TRUE sorgt dafür, dass keine Zeilen (basierend auf Datensatz x) weggelöscht werden
# Wetterdaten nach Datum hinzufügen
pj_umsatz_wetter <- merge(pj_umsatz, pj_wetter, by="Datum", all.x = TRUE)
# Schulferien nach Datum hinzufügen
pj_umsatz_wetter_ferien <- merge(pj_umsatz_wetter, pj_schulferien, by="Datum", all.x = TRUE)
# KiWo nach Datum hinzufügen
allData <- merge(pj_umsatz_wetter_ferien, pj_kiwo, by="Datum", all.x = TRUE)
allData <- merge(allData, umsatzFachEinzelHandelSH, by="Datum", all.x = TRUE)
# auf fehlende Werte überprüfen:
allData_na <- allData %>%
aggr(combined=TRUE, numbers=TRUE)
Warning: not enough horizontal space to display frequencies
# Imputation Temperatur und Windstaerke
# Aktuell: "Datenspende" vom Wert vom Vortag
# ZIEL: Mittelwert aus Temperatur von Vortag und Tag danach -> Armando! :)
allData <- allData %>%
hotdeck(variable = c("Temperatur", "Windstaerke"),
ord_var = "Datum")
#imputierte Werte graphisch überprüfen:
ggplot(allData) +
geom_point(aes(x = Datum, y = Temperatur, color = Temperatur_imp))
ggplot(allData) +
geom_point(aes(x = Datum, y = Windstaerke, color = Windstaerke_imp))
# NA Wettercodes zu 0, da Spalte WC_NA angibt, wo Wettercodes gefehlt haben
# Spalten 12 -24
# das gleiche gilt bei der Bewölkung
# Spalten 26 - 29
# weitere NA mit 0 füllen, dort wo es Sinn ergibt
allData <- allData %>%
mutate_at(c(12:34), ~replace(., is.na(.), 0))
# generating synthetic data
synthpop_allData <- syn(allData)[["syn"]]
Variable(s): Wochentag have been changed for synthesis from character to factor.
Warning: In your synthesis there are numeric variables with 5 or fewer levels: WC_Bewölkung_nicht_beobachtet, WC_Bewölkung_zunehmend, WC_Dunst_Staub, WC_Ereignisse_letzte_h, WC_Gewitter, WC_Nebel_Eisnebel, WC_Regen, WC_Schnee, WC_Sprühregen, WC_Trockenereignisse, WC_NA, Bewoelkungsgrad_gering, Bewoelkungsgrad_keine, Bewoelkungsgrad_mittel, Bewoelkungsgrad_stark, Bewoelkungsgrad_NA, Schulferien, KielerWoche, Temperatur_imp.
Consider changing them to factors. You can do it using parameter 'minnumlevels'.
Variable(s): WC_Bewölkung_abnehmend, WC_Bewölkung_gleichbleibend, WC_Schauer numeric but with only 1 or fewer distinct values turned into factor(s) for synthesis.
Variable WC_Bewölkung_abnehmend has only one value so its method has been changed to "constant".
Variable WC_Bewölkung_abnehmend removed as predictor because only one value.
Variable WC_Bewölkung_gleichbleibend has only one value so its method has been changed to "constant".
Variable WC_Bewölkung_gleichbleibend removed as predictor because only one value.
Variable WC_Schauer has only one value so its method has been changed to "constant".
Variable WC_Schauer removed as predictor because only one value.
Variables Temperatur_imp, Windstaerke_imp are collinear. Variables later in 'visit.sequence'
are derived from Temperatur_imp.
Synthesis
-----------
Datum Brot Brötchen Croissant Konditorei Kuchen Saisonbrot Wochentag Konditorei_imp Windstaerke
Temperatur WC_Bewölkung_abnehmend WC_Bewölkung_gleichbleibend WC_Bewölkung_nicht_beobachtet WC_Bewölkung_zunehmend WC_Dunst_Staub WC_Ereignisse_letzte_h WC_Gewitter WC_Nebel_Eisnebel WC_Regen
WC_Schauer WC_Schnee WC_Sprühregen WC_Trockenereignisse WC_NA Bewoelkungsgrad_gering Bewoelkungsgrad_keine Bewoelkungsgrad_mittel Bewoelkungsgrad_stark Bewoelkungsgrad_NA
Schulferien KielerWoche UmsatzFEH Temperatur_imp Windstaerke_imp
# dummy coding der Wochentage
allData_dummy <- dummy_cols(allData, select_columns = "Wochentag")
synthpop_allData_dummy <- dummy_cols(synthpop_allData, select_columns = "Wochentag")
allData_dummy$year <- year(allData_dummy$Datum)
allData_dummy$month <- month(allData_dummy$Datum)
allData_dummy$day <- day(allData_dummy$Datum)
synthpop_allData_dummy$year <- year(synthpop_allData_dummy$Datum)
synthpop_allData_dummy$month <- month(synthpop_allData_dummy$Datum)
synthpop_allData_dummy$day <- day(synthpop_allData_dummy$Datum)
save(allData_dummy, file="projectData_dummy_D.Rda")
save(synthpop_allData_dummy, file = "projectSynthpopData_dummy_D.Rda")
allData_dummy$Datum <- NULL
synthpop_allData_dummy$Datum <- NULL
#summary(allData_dummy)
save(allData_dummy, file="projectData_dummy.Rda")
save(synthpop_allData_dummy, file = "projectSynthpopData_dummy.Rda")
# Erstelle einen leeren Dataframe mit einer Spalte für das Datum
testDatenSatz <- data.frame(Datum = character())
# Erstelle eine Sequenz von Daten im angegebenen Zeitraum
datum_sequenz <- seq(from = as.Date("2019-06-09"),
to = as.Date("2019-07-30"),
by = "days")
# Füge die Daten der Sequenz dem Dataframe hinzu
sBrot <- select(pj_umsatz, "Datum", "Saisonbrot")
testDatenSatz <- rbind(testDatenSatz, data.frame(Datum = datum_sequenz))
testDatenSatz$Wochentag <- weekdays(testDatenSatz$Datum)
testDatenSatz <- merge(testDatenSatz, pj_wetter, by="Datum", all.x = TRUE)
testDatenSatz <- merge(testDatenSatz, pj_schulferien, by="Datum", all.x = TRUE)
testDatenSatz <- merge(testDatenSatz, pj_kiwo, by="Datum", all.x = TRUE)
testDatenSatz <- merge(testDatenSatz, sBrot, by="Datum", all.x = TRUE)
testDatenSatz <- merge(testDatenSatz, umsatzFachEinzelHandelSH, by="Datum", all.x = TRUE)
testDatenSatz <- testDatenSatz %>%
hotdeck(variable = c("Temperatur", "Windstaerke"),
ord_var = "Datum")
#imputierte Werte von testDatenSatz graphisch überprüfen:
ggplot(testDatenSatz) +
geom_point(aes(x = Datum, y = Temperatur, color = Temperatur_imp))
ggplot(testDatenSatz) +
geom_point(aes(x = Datum, y = Windstaerke, color = Windstaerke_imp))
testDatenSatz <- testDatenSatz %>%
mutate_at(c(4:26), ~replace(., is.na(.), 0))
# dummy coding der Wochentage
testDatenSatz <- dummy_cols(testDatenSatz, select_columns = "Wochentag")
testDatenSatz$year <- year(testDatenSatz$Datum)
testDatenSatz$month <- month(testDatenSatz$Datum)
testDatenSatz$day <- day(testDatenSatz$Datum)
testDatenSatz$Datum <- NULL
testDatenSatz$Wochentag <- NULL
summary(testDatenSatz)
Windstaerke Temperatur WC_Bewölkung_abnehmend WC_Bewölkung_gleichbleibend
Min. :3.000 Min. :14.46 Min. :0 Min. :0
1st Qu.:5.000 1st Qu.:16.93 1st Qu.:0 1st Qu.:0
Median :6.000 Median :19.59 Median :0 Median :0
Mean :5.788 Mean :20.41 Mean :0 Mean :0
3rd Qu.:7.000 3rd Qu.:23.40 3rd Qu.:0 3rd Qu.:0
Max. :9.000 Max. :29.73 Max. :0 Max. :0
WC_Bewölkung_nicht_beobachtet WC_Bewölkung_zunehmend WC_Dunst_Staub WC_Ereignisse_letzte_h WC_Gewitter
Min. :0.0000 Min. :0 Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0 Median :0.0000 Median :0.0000 Median :0.0000
Mean :0.1538 Mean :0 Mean :0.1346 Mean :0.1538 Mean :0.1154
3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :0 Max. :1.0000 Max. :1.0000 Max. :1.0000
WC_Nebel_Eisnebel WC_Regen WC_Schauer WC_Schnee WC_Sprühregen WC_Trockenereignisse
Min. :0 Min. :0.0000 Min. :0 Min. :0 Min. :0.00000 Min. :0.00000
1st Qu.:0 1st Qu.:0.0000 1st Qu.:0 1st Qu.:0 1st Qu.:0.00000 1st Qu.:0.00000
Median :0 Median :0.0000 Median :0 Median :0 Median :0.00000 Median :0.00000
Mean :0 Mean :0.1731 Mean :0 Mean :0 Mean :0.01923 Mean :0.01923
3rd Qu.:0 3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.00000 3rd Qu.:0.00000
Max. :0 Max. :1.0000 Max. :0 Max. :0 Max. :1.00000 Max. :1.00000
WC_NA Bewoelkungsgrad_gering Bewoelkungsgrad_keine Bewoelkungsgrad_mittel Bewoelkungsgrad_stark
Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
Mean :0.2308 Mean :0.05769 Mean :0.1346 Mean :0.4615 Mean :0.3462
3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
Bewoelkungsgrad_NA Schulferien KielerWoche Saisonbrot UmsatzFEH Temperatur_imp
Min. :0 Min. :0.0000 Min. :0.0000 Min. :0 Min. :118.5 Mode :logical
1st Qu.:0 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0 1st Qu.:118.5 FALSE:52
Median :0 Median :1.0000 Median :0.0000 Median :0 Median :122.4
Mean :0 Mean :0.5769 Mean :0.1731 Mean :0 Mean :120.8
3rd Qu.:0 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0 3rd Qu.:122.4
Max. :0 Max. :1.0000 Max. :1.0000 Max. :0 Max. :122.4
Windstaerke_imp Wochentag_Friday Wochentag_Monday Wochentag_Saturday Wochentag_Sunday Wochentag_Thursday
Mode :logical Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
FALSE:52 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
Mean :0.1346 Mean :0.1538 Mean :0.1346 Mean :0.1538 Mean :0.1346
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
Wochentag_Tuesday Wochentag_Wednesday year month day
Min. :0.0000 Min. :0.0000 Min. :2019 Min. :6.000 Min. : 1.00
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:2019 1st Qu.:6.000 1st Qu.:11.00
Median :0.0000 Median :0.0000 Median :2019 Median :7.000 Median :17.50
Mean :0.1538 Mean :0.1346 Mean :2019 Mean :6.577 Mean :17.19
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:2019 3rd Qu.:7.000 3rd Qu.:24.00
Max. :1.0000 Max. :1.0000 Max. :2019 Max. :7.000 Max. :30.00
save(testDatenSatz, file="Datenaufbereitung_Testdaten.Rda")
features <- c(#"day", "month", "year",
"Windstaerke", "Temperatur", "WC_Bewölkung_abnehmend",
"WC_Bewölkung_gleichbleibend", "WC_Bewölkung_nicht_beobachtet", "WC_Bewölkung_zunehmend",
"WC_Dunst_Staub", "WC_Ereignisse_letzte_h", "WC_Gewitter",
"WC_Nebel_Eisnebel", "WC_Regen", "WC_Schauer",
"WC_Schnee", "WC_Sprühregen", "WC_Trockenereignisse",
"WC_NA", "Bewoelkungsgrad_gering", "Bewoelkungsgrad_keine",
"Bewoelkungsgrad_mittel", "Bewoelkungsgrad_stark", "Bewoelkungsgrad_NA",
"Schulferien", "KielerWoche"
#"Wochentag_Tuesday",
#"Wochentag_Thursday", #"Saisonbrot", "UmsatzFEH",
#"Wochentag_Friday", "Wochentag_Wednesday", "Wochentag_Monday",
#"Wochentag_Saturday", "Wochentag_Sunday"
)
labels <- c("Brot", "Brötchen", "Croissant", "Konditorei", "Kuchen")
# Setting the random counter to a fixed value, so the random initialization stays the same (the random split is always the same)
set.seed(1)
assignment <- sample(1:3, size = nrow(allData_dummy), prob = c(.7, .2, .1), replace = TRUE)
allData_dummy2 <- rbind(allData_dummy[assignment == 1,], synthpop_allData_dummy)
training_features <- allData_dummy2[,features]
training_labels <- allData_dummy2[,labels]
validation_features <- allData_dummy[assignment == 2, features]
validation_labels <- allData_dummy[assignment == 2, labels]
t_features <- allData_dummy[assignment == 3, features]
t_labels <- allData_dummy[assignment == 3, labels]
testing_features <- testDatenSatz %>%
select(all_of(features))
#are there any missing values?
table(is.na(training_features))
FALSE
82915
table(is.na(validation_features))
FALSE
9614
table(is.na(testing_features))
FALSE
1196
#summary(allData_dummy)
reticulate::repl_python()
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
# The argument "input_shape" for the definition of the input layer must include
# the number of input variables (features) used for the model.
# To automatically calculate this number we use the function `r.training_features.keys()`,
# which returns the list of variable names of the dataframe `training_features`.
# Then, the funtion `len()` returns the length of this list of variable names
# (i.e. the number of variables in the input)
model = Sequential([
InputLayer(input_shape = (len(r.training_features.keys()), )),
BatchNormalization(),
Dense(len(r.training_features.keys()), activation = 'swish'),
Dropout(0.2),
Dense(len(r.training_features.keys()), activation = 'swish'),
Dense(5)
])
# Ausgabe einer ZUsammenfassung zur Form des MOdells, das geschätzt wird (nicht notwendig)
#model.summary()
# definition of the loss function and the optimazation function with hyperparameters
model.compile(loss="mape", optimizer=Adam(learning_rate=0.001))
#Schätzung des Modells
history = model.fit(r.training_features, r.training_labels, epochs = 300,
validation_data = (r.validation_features, r.validation_labels), verbose = 0)
model.save("python_model.h5")
quit
# Graphische Ausgabe der Modelloptimierung
#create data
data <- data.frame(val_loss = unlist(py$history$history$val_loss),
loss = unlist(py$history$history$loss))
ggplot(data[-(1:10), ])+
geom_line(aes(x = 1:length(val_loss), y = val_loss, colour = "Validation Loss")) +
geom_line(aes(x = 1:length(loss), y = loss, colour = "Training Loss")) +
scale_colour_manual(values = c("Training Loss"="blue", "Validation Loss" = "red")) +
labs(title = "Loss Function Values During Optimazation") +
xlab("Iteration Number") +
ylab("Loss")
NA
NA
# Schätzung der (normierten) Preise für die Trainings- und Testdaten
training_predictions <- py$model$predict(training_features)
1/113 [..............................] - ETA: 3s
113/113 [==============================] - 0s 328us/step
validation_predictions <- py$model$predict(validation_features)
1/14 [=>............................] - ETA: 0s
14/14 [==============================] - 0s 289us/step
testing_predictions <- py$model$predict(testing_features)
1/2 [==============>...............] - ETA: 0s
2/2 [==============================] - 0s 381us/step
t_predictions <- py$model$predict(t_features)
1/7 [===>..........................] - ETA: 0s
7/7 [==============================] - 0s 313us/step
a <- format(mape(training_labels[,1], training_predictions[,1])*100, digits=3, nsmall=2)
b <- format(mape(training_labels[,2], training_predictions[,2])*100, digits=3, nsmall=2)
c <- format(mape(training_labels[,3], training_predictions[,3])*100, digits=3, nsmall=2)
d <- format(mape(training_labels[,4], training_predictions[,4])*100, digits=3, nsmall=2)
e <- format(mape(training_labels[,5], training_predictions[,5])*100, digits=3, nsmall=2)
cat(paste0("\nMAPE on the Training Data1:\t", a))
MAPE on the Training Data1: 26.70
cat(paste0("\nMAPE on the Training Data2:\t", b))
MAPE on the Training Data2: 17.94
cat(paste0("\nMAPE on the Training Data3:\t", c))
MAPE on the Training Data3: 23.69
cat(paste0("\nMAPE on the Training Data4:\t", d))
MAPE on the Training Data4: 23.44
cat(paste0("\nMAPE on the Training Data5:\t", e, "\n"))
MAPE on the Training Data5: 16.02
g <- format(mape(validation_labels[,1], validation_predictions[,1])*100, digits=3, nsmall=2)
h <- format(mape(validation_labels[,2], validation_predictions[,2])*100, digits=3, nsmall=2)
i <- format(mape(validation_labels[,3], validation_predictions[,3])*100, digits=3, nsmall=2)
j <- format(mape(validation_labels[,4], validation_predictions[,4])*100, digits=3, nsmall=2)
k <- format(mape(validation_labels[,5], validation_predictions[,5])*100, digits=3, nsmall=2)
cat(paste0("\nMAPE on the Validation Data1:\t", g))
MAPE on the Validation Data1: 25.48
cat(paste0("\nMAPE on the Validation Data2:\t", h))
MAPE on the Validation Data2: 18.23
cat(paste0("\nMAPE on the Validation Data3:\t", i))
MAPE on the Validation Data3: 25.02
cat(paste0("\nMAPE on the Validation Data4:\t", j))
MAPE on the Validation Data4: 22.25
cat(paste0("\nMAPE on the Validation Data5:\t", k, "\n"))
MAPE on the Validation Data5: 17.91
l <- format(mape(t_labels[,1], t_predictions[,1])*100, digits=3, nsmall=2)
m <- format(mape(t_labels[,2], t_predictions[,2])*100, digits=3, nsmall=2)
n <- format(mape(t_labels[,3], t_predictions[,3])*100, digits=3, nsmall=2)
o <- format(mape(t_labels[,4], t_predictions[,4])*100, digits=3, nsmall=2)
p <- format(mape(t_labels[,5], t_predictions[,5])*100, digits=3, nsmall=2)
cat(paste0("\nMAPE on the Validation Data1:\t", l))
MAPE on the Validation Data1: 26.20
cat(paste0("\nMAPE on the Validation Data2:\t", m))
MAPE on the Validation Data2: 18.14
cat(paste0("\nMAPE on the Validation Data3:\t", n))
MAPE on the Validation Data3: 25.22
cat(paste0("\nMAPE on the Validation Data4:\t", o))
MAPE on the Validation Data4: 23.45
cat(paste0("\nMAPE on the Validation Data5:\t", p, "\n"))
MAPE on the Validation Data5: 16.49
# Mean of Training and Validation Data MAPE
meanT <- c(as.double(a), as.double(b), as.double(c), as.double(d), as.double(e))
meanV <- c(as.double(g), as.double(h), as.double(i), as.double(j), as.double(k))
meanT <- c(as.double(l), as.double(m), as.double(n), as.double(o), as.double(p))
cat(paste0("\nMean Training MAPE: ", mean(meanT), "\n"))
Mean Training MAPE: 21.9
cat(paste0("Mean Validation MAPE: ", mean(meanV), "\n"))
Mean Validation MAPE: 21.778
cat(paste0("Mean Test MAPE: ", mean(meanT), "\n"))
Mean Test MAPE: 21.9
data_train <- data.frame(prediction = training_predictions[,1], actual = training_labels[,1])
data_val <- data.frame(prediction = validation_predictions[,1], actual = validation_labels[,1])
data_test <- data.frame(prediction = testing_predictions[,1])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train[]) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 1") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val[,]) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 1") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 1") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 2 -------------------------#
data_train2 <- data.frame(prediction = training_predictions[,2], actual = training_labels[,2])
data_val2 <- data.frame(prediction = validation_predictions[,2], actual = validation_labels[,2])
data_test2 <- data.frame(prediction = testing_predictions[,2])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train2) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 2") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val2) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 2") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test2) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 2") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 3 -------------------------#
data_train3 <- data.frame(prediction = training_predictions[,3], actual = training_labels[,3])
data_val3 <- data.frame(prediction = validation_predictions[,3], actual = validation_labels[,3])
data_test3 <- data.frame(prediction = testing_predictions[,3])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train3) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 3") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val3) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 3") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test3) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 3") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 4 -------------------------#
data_train4 <- data.frame(prediction = training_predictions[,4], actual = training_labels[,4])
data_val4 <- data.frame(prediction = validation_predictions[,4], actual = validation_labels[,4])
data_test4 <- data.frame(prediction = testing_predictions[,4])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train4) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 4") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val4) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 4") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test4) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 4") +
xlab("Case Number") +
ylab("Price in EUR")
#------------------------- 5 -------------------------#
data_train5 <- data.frame(prediction = training_predictions[,5], actual = training_labels[,5])
data_val5 <- data.frame(prediction = validation_predictions[,5], actual = validation_labels[,5])
data_test5 <- data.frame(prediction = testing_predictions[,5])
# Plot der Ergebnisse der Trainingsdaten
ggplot(data_train5) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Training Data 5") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Validierungsdaten
ggplot(data_val5) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
geom_line( aes(x=1:length(actual), y=actual, colour = "Actual Values" )) +
scale_colour_manual( values = c("Predicted Values"="blue", "Actual Values"="red") ) +
labs(title="Predicted and Actual Values for the Validation Data 5") +
xlab("Case Number") +
ylab("Price in EUR")
# Plot der Ergebnisse der Testdaten
ggplot(data_test5) +
geom_line( aes(x=1:length(prediction), y=prediction, colour = "Predicted Values" )) +
labs(title="Prediction for the Test Data 5") +
xlab("Case Number") +
ylab("Price in EUR")